Introduction

The aim is to characterize the human fetal kidney from the kidney cell atlas. You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1] The rds data can be download using the download link: https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds. The reference was downloaded and created in the script scripts/prepare-fetal-references.R.

Packages

library(Seurat)
library(SCpubr)
library(tidyverse)
library(patchwork)

set.seed(params$seed)
options(future.globals.maxSize = 891289600000)

Base directories

# The base path for the OpenScPCA repository, found by its (hidden) .git directory
repository_base <- rprojroot::find_root(rprojroot::is_git_root)

# The path to this module
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")

Input files

The input file ../scratch/fetal_kidney.rds is the output of the script prepare-fetal-references.R.

Output file

We will save the result of the differential expression analysis in results/references/00b_marker_genes_fetal_kidney_Stewart.csv. Notebook is saved in the notebook/00-reference directory.

path_to_output <- file.path(module_base, "results", "references")

Analysis

Load the reference

fetal_kidney <- readRDS(params$fetal_kidney_path)

d1 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1, 2), group.by = "compartment", label = TRUE, repel = TRUE) + NoLegend()
d2 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1, 2), group.by = "cell_type", label = TRUE, repel = TRUE) + NoLegend()

d1 | d2

Characterization of compartment and cell types in the reference

Here, we use an unbiased approach to find transcripts that characterized the different compartments and cell types.

This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations.

We run DElegate::FindAllMarkers2() to find markers of the different clusters and manually check if they do make sense. DElegate::FindAllMarkers2() is an improved version of Seurat::FindAllMarkers() based on pseudobulk differential expression method. Please check the preprint from Hafemeister and Halbritter: https://www.biorxiv.org/content/10.1101/2023.03.28.534443v2 and tool described here: https://github.com/cancerbits/DElegate

Find marker genes for each of the compartment

de_results <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "compartment", )
## Warning in size + sum(size_args, na.rm = FALSE): NAs produced by integer
## overflow
# filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold, ]

DT::datatable(s.markers,
  caption = ("marker genes"),
  extensions = "Buttons",
  options = list(
    dom = "Bfrtip",
    buttons = c("csv", "excel")
  )
)
# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
  group_by(group1) %>%
  top_n(n = 5, wt = log_fc) -> top5

# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$compartment
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)

p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction = "umap", group.by = "compartment", label = TRUE, repel = TRUE) + ggtitle("compartment")
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "compartment") + NoLegend() +
  scale_fill_gradientn(colors = c("#01665e", "#35978f", "darkslategray3", "#f7f7f7", "#fee391", "#fec44f", "#F9AD03"))
p3 <- ggplot(fetal_kidney@meta.data, aes(compartment, fill = compartment)) +
  geom_bar() +
  NoLegend()


common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3, 2)) | p2)) + plot_layout(widths = c(1, 2)) + plot_layout(heights = c(3, 1)) + plot_annotation(title = common_title))

write_csv(de_results, file = file.path(path_to_output, "00a_marker_compartment_fetal_kidney_Stewart.csv"))

Find marker genes for each of the cell types

de_results <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "cell_type")
## Warning in size + sum(size_args, na.rm = FALSE): NAs produced by integer
## overflow
# filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold, ]


DT::datatable(s.markers,
  caption = ("marker genes"),
  extensions = "Buttons",
  options = list(
    dom = "Bfrtip",
    buttons = c("csv", "excel")
  )
)
# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
  group_by(group1) %>%
  top_n(n = 5, wt = log_fc) -> top5

# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$cell_type
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)

p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction = "umap", group.by = "cell_type", label = TRUE, repel = TRUE) + ggtitle("cell_type") + NoLegend()
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "cell_type") + NoLegend() +
  scale_fill_gradientn(colors = c("#01665e", "#35978f", "darkslategray3", "#f7f7f7", "#fee391", "#fec44f", "#F9AD03"))
p3 <- ggplot(fetal_kidney@meta.data, aes(cell_type, fill = cell_type)) +
  geom_bar() +
  NoLegend() +
  scale_x_discrete(guide = guide_axis(angle = 90))


common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3, 2)) | p2)) + plot_layout(widths = c(1, 1)) + plot_layout(heights = c(3, 1)) + plot_annotation(title = common_title))

write_csv(de_results, file = file.path(path_to_output, "00a_marker_cell-type_fetal_kidney_Stewart.csv"))

Session info

sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.2.0    lubridate_1.9.3    forcats_1.0.0      stringr_1.5.1     
##  [5] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1       
##  [9] tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0    SCpubr_2.0.2      
## [13] Seurat_5.1.0       SeuratObject_5.0.2 sp_2.1-4          
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3       rstudioapi_0.16.0        jsonlite_1.8.8          
##   [4] magrittr_2.0.3           spatstat.utils_3.1-0     farver_2.1.2            
##   [7] rmarkdown_2.28           DElegate_1.2.1           fs_1.6.4                
##  [10] vctrs_0.6.5              ROCR_1.0-11              spatstat.explore_3.3-2  
##  [13] htmltools_0.5.8.1        gridGraphics_0.5-1       sass_0.4.9              
##  [16] sctransform_0.4.1        parallelly_1.38.0        KernSmooth_2.23-24      
##  [19] bslib_0.8.0              htmlwidgets_1.6.4        ica_1.0-3               
##  [22] plyr_1.8.9               plotly_4.10.4            zoo_1.8-12              
##  [25] cachem_1.1.0             igraph_2.0.3             mime_0.12               
##  [28] lifecycle_1.0.4          pkgconfig_2.0.3          Matrix_1.7-0            
##  [31] R6_2.5.1                 fastmap_1.2.0            MatrixGenerics_1.16.0   
##  [34] fitdistrplus_1.2-1       future_1.34.0            shiny_1.9.1             
##  [37] digest_0.6.37            colorspace_2.1-1         rprojroot_2.0.4         
##  [40] tensor_1.5               RSpectra_0.16-2          irlba_2.3.5.1           
##  [43] crosstalk_1.2.1          labeling_0.4.3           progressr_0.14.0        
##  [46] fansi_1.0.6              spatstat.sparse_3.1-0    timechange_0.3.0        
##  [49] httr_1.4.7               polyclip_1.10-7          abind_1.4-5             
##  [52] compiler_4.4.0           bit64_4.0.5              withr_3.0.1             
##  [55] viridis_0.6.5            fastDummies_1.7.4        highr_0.11              
##  [58] MASS_7.3-61              tools_4.4.0              lmtest_0.9-40           
##  [61] httpuv_1.6.15            future.apply_1.11.2      goftest_1.2-3           
##  [64] glue_1.7.0               nlme_3.1-166             promises_1.3.0          
##  [67] grid_4.4.0               Rtsne_0.17               cluster_2.1.6           
##  [70] reshape2_1.4.4           generics_0.1.3           gtable_0.3.5            
##  [73] spatstat.data_3.1-2      tzdb_0.4.0               data.table_1.16.0       
##  [76] hms_1.1.3                utf8_1.2.4               spatstat.geom_3.3-2     
##  [79] RcppAnnoy_0.0.22         ggrepel_0.9.5            RANN_2.6.2              
##  [82] pillar_1.9.0             vroom_1.6.5              limma_3.60.4            
##  [85] yulab.utils_0.1.7        spam_2.10-0              RcppHNSW_0.6.0          
##  [88] later_1.3.2              splines_4.4.0            lattice_0.22-6          
##  [91] bit_4.0.5                survival_3.7-0           deldir_2.0-4            
##  [94] tidyselect_1.2.1         locfit_1.5-9.10          miniUI_0.1.1.1          
##  [97] pbapply_1.7-2            knitr_1.48               gridExtra_2.3           
## [100] edgeR_4.2.1              scattermore_1.2          xfun_0.47               
## [103] statmod_1.5.0            matrixStats_1.3.0        DT_0.33                 
## [106] stringi_1.8.4            lazyeval_0.2.2           yaml_2.3.10             
## [109] evaluate_0.24.0          codetools_0.2-20         ggplotify_0.1.2         
## [112] cli_3.6.3                uwot_0.2.2               xtable_1.8-4            
## [115] reticulate_1.38.0        munsell_0.5.1            jquerylib_0.1.4         
## [118] Rcpp_1.0.13              globals_0.16.3           spatstat.random_3.3-1   
## [121] png_0.1-8                spatstat.univar_3.0-0    parallel_4.4.0          
## [124] assertthat_0.2.1         dotCall64_1.1-1          sparseMatrixStats_1.16.0
## [127] listenv_0.9.1            viridisLite_0.4.2        scales_1.3.0            
## [130] ggridges_0.5.6           crayon_1.5.3             leiden_0.4.3.1          
## [133] rlang_1.1.4              cowplot_1.1.3